In [1]:
import requests
import pandas as pd
In [2]:
r=requests.get('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data')
r is response
In [3]:
print(r.headers)
print(r.text[:100])
In [4]:
with open('iris.data','w') as fp:
fp.write(r.text)
In [5]:
df=pd.read_csv('iris.data',names=['sepal length', 'sepal width', 'petal length', 'petal width', 'class'])
In [6]:
df.head()
Out[6]:
In [7]:
df['sepal length'][:10]
Out[7]:
Another way that we can perform this data slicing is to use the .ix[row,column] notation. Let's select the first two columns and first four rows using this notation. :3 means 0 to 3
In [8]:
df.ix[:3,1]
Out[8]:
In [9]:
df[(df['class']=='Iris-setosa') & (df['sepal width']>3.5)]
Out[9]:
In [10]:
df[(df['class']=='Iris-setosa') & (df['sepal width']>3.5)].describe()
Out[10]:
In [11]:
df.corr()
Out[11]:
df.corr 解释了相关性
In [12]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
import numpy as np
In [13]:
fig,ax=plt.subplots(figsize=(6,4))
ax.hist(df['petal width'],color='black')
ax.set_ylabel('Count',fontsize=12)
ax.set_xlabel('Width',fontsize=12)
plt.title('Iris Petal Width',fontsize=14,y=1.01)
Out[13]:
In [14]:
fig,ax=plt.subplots(2,2,figsize=(6,4))
ax[0][0].hist(df['petal width'],color='black')
ax[0][0].set_ylabel('Count',fontsize=12)
ax[0][0].set_xlabel('Width',fontsize=12)
ax[0][0].set_title('Iris Petal Width',fontsize=14,y=1.01)
ax[0][1].hist(df['petal length'],color='black')
ax[0][1].set_ylabel('Count',fontsize=12)
ax[0][1].set_xlabel('Length',fontsize=12)
ax[0][1].set_title('Iris Petal Length',fontsize=14,y=1.01)
ax[1][0].hist(df['sepal width'],color='black')
ax[1][0].set_ylabel('Count',fontsize=12)
ax[1][0].set_xlabel('Width',fontsize=12)
ax[1][0].set_title('Iris Sepal Width',fontsize=14,y=1.01)
ax[1][1].hist(df['sepal length'],color='black')
ax[1][1].set_ylabel('Count',fontsize=12)
ax[1][1].set_xlabel('Length',fontsize=12)
ax[1][1].set_title('Iris Sepal Length',fontsize=14,y=1.01)
plt.tight_layout()
In [15]:
fig,ax=plt.subplots(figsize=(6,6))
ax.scatter(df['petal width'],df['petal length'],color='green')
ax.set_xlabel('Petal Width')
ax.set_ylabel('Petal Length')
ax.set_title('Petal Scatterplot')
Out[15]:
In [16]:
fig,ax=plt.subplots(figsize=(6,6))
ax.plot(df['petal length'],color='blue')
ax.set_xlabel('Specimen Number')
ax.set_ylabel('Petal Length')
ax.set_title('Petal Length Plot')
Out[16]:
In [17]:
import seaborn as sns
sns.pairplot(df,hue='class')
Out[17]:
In [18]:
fig,ax=plt.subplots(2,2,figsize=(7,7))
sns.set(style='white',palette='muted')
sns.violinplot(x=df['class'],y=df['sepal length'],ax=ax[0,0])
sns.violinplot(x=df['class'],y=df['sepal width'],ax=ax[0,1])
sns.violinplot(x=df['class'],y=df['petal length'],ax=ax[1,0])
sns.violinplot(x=df['class'],y=df['petal width'],ax=ax[1,1])
fig.suptitle('Violin Plots',fontsize=16,y=1.03)
for i in ax.flat:
plt.setp(i.get_xticklabels(),rotation=-90)
fig.tight_layout()
In [ ]: